library(ezids)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
#Creating a dataframe
Telecom_Data=data.frame(read.csv("Telecom Data.csv"))
str(Telecom_Data)
## 'data.frame':    51047 obs. of  58 variables:
##  $ CustomerID               : int  3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
##  $ Churn                    : chr  "Yes" "Yes" "No" "No" ...
##  $ MonthlyRevenue           : num  24 17 38 82.3 17.1 ...
##  $ MonthlyMinutes           : int  219 10 8 1312 0 682 26 98 24 1056 ...
##  $ TotalRecurringCharge     : int  22 17 38 75 17 52 30 66 35 75 ...
##  $ DirectorAssistedCalls    : num  0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
##  $ OverageMinutes           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ RoamingCalls             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ PercChangeMinutes        : int  -157 -4 -2 157 0 148 60 24 20 43 ...
##  $ PercChangeRevenues       : num  -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
##  $ DroppedCalls             : num  0.7 0.3 0 52 0 9 0 0 0 0 ...
##  $ BlockedCalls             : num  0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
##  $ UnansweredCalls          : num  6.3 2.7 0 76 0 13 2.3 4 1 0 ...
##  $ CustomerCareCalls        : num  0 0 0 4.3 0 0.7 0 4 0 0 ...
##  $ ThreewayCalls            : num  0 0 0 1.3 0 0 0 0 0 0 ...
##  $ ReceivedCalls            : num  97.2 0 0.4 200.3 0 ...
##  $ OutboundCalls            : num  0 0 0.3 370.3 0 ...
##  $ InboundCalls             : num  0 0 0 147 0 0 0 0 1.7 0 ...
##  $ PeakCallsInOut           : num  58 5 1.3 555.7 0 ...
##  $ OffPeakCallsInOut        : num  24 1 3.7 303.7 0 ...
##  $ DroppedBlockedCalls      : num  1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
##  $ CallForwardingCalls      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CallWaitingCalls         : num  0.3 0 0 22.7 0 0.7 0 0 0 0 ...
##  $ MonthsInService          : int  61 58 60 59 53 53 57 59 53 55 ...
##  $ UniqueSubs               : int  2 1 1 2 2 1 2 2 3 1 ...
##  $ ActiveSubs               : int  1 1 1 2 2 1 2 2 3 1 ...
##  $ ServiceArea              : chr  "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
##  $ Handsets                 : int  2 2 1 9 4 3 2 3 4 9 ...
##  $ HandsetModels            : int  2 1 1 4 3 2 2 3 3 5 ...
##  $ CurrentEquipmentDays     : int  361 1504 1812 458 852 231 601 464 544 388 ...
##  $ AgeHH1                   : int  62 40 26 30 46 28 52 46 36 46 ...
##  $ AgeHH2                   : int  0 42 26 0 54 0 58 46 34 68 ...
##  $ ChildrenInHH             : chr  "No" "Yes" "Yes" "No" ...
##  $ HandsetRefurbished       : chr  "No" "No" "No" "No" ...
##  $ HandsetWebCapable        : chr  "Yes" "No" "No" "Yes" ...
##  $ TruckOwner               : chr  "No" "No" "No" "No" ...
##  $ RVOwner                  : chr  "No" "No" "No" "No" ...
##  $ Homeownership            : chr  "Known" "Known" "Unknown" "Known" ...
##  $ BuysViaMailOrder         : chr  "Yes" "Yes" "No" "Yes" ...
##  $ RespondsToMailOffers     : chr  "Yes" "Yes" "No" "Yes" ...
##  $ OptOutMailings           : chr  "No" "No" "No" "No" ...
##  $ NonUSTravel              : chr  "No" "No" "No" "No" ...
##  $ OwnsComputer             : chr  "Yes" "Yes" "No" "No" ...
##  $ HasCreditCard            : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ RetentionCalls           : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ RetentionOffersAccepted  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NewCellphoneUser         : chr  "No" "Yes" "Yes" "Yes" ...
##  $ NotNewCellphoneUser      : chr  "No" "No" "No" "No" ...
##  $ ReferralsMadeBySubscriber: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IncomeGroup              : int  4 5 6 6 9 1 9 6 9 5 ...
##  $ OwnsMotorcycle           : chr  "No" "No" "No" "No" ...
##  $ AdjustmentsToCreditRating: int  0 0 0 0 1 1 1 0 0 1 ...
##  $ HandsetPrice             : chr  "30" "30" "Unknown" "10" ...
##  $ MadeCallToRetentionTeam  : chr  "Yes" "No" "No" "No" ...
##  $ CreditRating             : chr  "1-Highest" "4-Medium" "3-Good" "4-Medium" ...
##  $ PrizmCode                : chr  "Suburban" "Suburban" "Town" "Other" ...
##  $ Occupation               : chr  "Professional" "Professional" "Crafts" "Other" ...
##  $ MaritalStatus            : chr  "No" "Yes" "Yes" "No" ...
#Changing data type of occupation and churn into factor
Telecom_Data$Churn <- as.factor(Telecom_Data$Churn)
Telecom_Data$Occupation <- as.factor(Telecom_Data$Occupation)

#Printing the structure and summary of data
str(Telecom_Data)
## 'data.frame':    51047 obs. of  58 variables:
##  $ CustomerID               : int  3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
##  $ Churn                    : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 1 1 1 1 1 ...
##  $ MonthlyRevenue           : num  24 17 38 82.3 17.1 ...
##  $ MonthlyMinutes           : int  219 10 8 1312 0 682 26 98 24 1056 ...
##  $ TotalRecurringCharge     : int  22 17 38 75 17 52 30 66 35 75 ...
##  $ DirectorAssistedCalls    : num  0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
##  $ OverageMinutes           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ RoamingCalls             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ PercChangeMinutes        : int  -157 -4 -2 157 0 148 60 24 20 43 ...
##  $ PercChangeRevenues       : num  -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
##  $ DroppedCalls             : num  0.7 0.3 0 52 0 9 0 0 0 0 ...
##  $ BlockedCalls             : num  0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
##  $ UnansweredCalls          : num  6.3 2.7 0 76 0 13 2.3 4 1 0 ...
##  $ CustomerCareCalls        : num  0 0 0 4.3 0 0.7 0 4 0 0 ...
##  $ ThreewayCalls            : num  0 0 0 1.3 0 0 0 0 0 0 ...
##  $ ReceivedCalls            : num  97.2 0 0.4 200.3 0 ...
##  $ OutboundCalls            : num  0 0 0.3 370.3 0 ...
##  $ InboundCalls             : num  0 0 0 147 0 0 0 0 1.7 0 ...
##  $ PeakCallsInOut           : num  58 5 1.3 555.7 0 ...
##  $ OffPeakCallsInOut        : num  24 1 3.7 303.7 0 ...
##  $ DroppedBlockedCalls      : num  1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
##  $ CallForwardingCalls      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CallWaitingCalls         : num  0.3 0 0 22.7 0 0.7 0 0 0 0 ...
##  $ MonthsInService          : int  61 58 60 59 53 53 57 59 53 55 ...
##  $ UniqueSubs               : int  2 1 1 2 2 1 2 2 3 1 ...
##  $ ActiveSubs               : int  1 1 1 2 2 1 2 2 3 1 ...
##  $ ServiceArea              : chr  "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
##  $ Handsets                 : int  2 2 1 9 4 3 2 3 4 9 ...
##  $ HandsetModels            : int  2 1 1 4 3 2 2 3 3 5 ...
##  $ CurrentEquipmentDays     : int  361 1504 1812 458 852 231 601 464 544 388 ...
##  $ AgeHH1                   : int  62 40 26 30 46 28 52 46 36 46 ...
##  $ AgeHH2                   : int  0 42 26 0 54 0 58 46 34 68 ...
##  $ ChildrenInHH             : chr  "No" "Yes" "Yes" "No" ...
##  $ HandsetRefurbished       : chr  "No" "No" "No" "No" ...
##  $ HandsetWebCapable        : chr  "Yes" "No" "No" "Yes" ...
##  $ TruckOwner               : chr  "No" "No" "No" "No" ...
##  $ RVOwner                  : chr  "No" "No" "No" "No" ...
##  $ Homeownership            : chr  "Known" "Known" "Unknown" "Known" ...
##  $ BuysViaMailOrder         : chr  "Yes" "Yes" "No" "Yes" ...
##  $ RespondsToMailOffers     : chr  "Yes" "Yes" "No" "Yes" ...
##  $ OptOutMailings           : chr  "No" "No" "No" "No" ...
##  $ NonUSTravel              : chr  "No" "No" "No" "No" ...
##  $ OwnsComputer             : chr  "Yes" "Yes" "No" "No" ...
##  $ HasCreditCard            : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ RetentionCalls           : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ RetentionOffersAccepted  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NewCellphoneUser         : chr  "No" "Yes" "Yes" "Yes" ...
##  $ NotNewCellphoneUser      : chr  "No" "No" "No" "No" ...
##  $ ReferralsMadeBySubscriber: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IncomeGroup              : int  4 5 6 6 9 1 9 6 9 5 ...
##  $ OwnsMotorcycle           : chr  "No" "No" "No" "No" ...
##  $ AdjustmentsToCreditRating: int  0 0 0 0 1 1 1 0 0 1 ...
##  $ HandsetPrice             : chr  "30" "30" "Unknown" "10" ...
##  $ MadeCallToRetentionTeam  : chr  "Yes" "No" "No" "No" ...
##  $ CreditRating             : chr  "1-Highest" "4-Medium" "3-Good" "4-Medium" ...
##  $ PrizmCode                : chr  "Suburban" "Suburban" "Town" "Other" ...
##  $ Occupation               : Factor w/ 8 levels "Clerical","Crafts",..: 5 5 2 4 5 4 7 5 4 5 ...
##  $ MaritalStatus            : chr  "No" "Yes" "Yes" "No" ...
summary(Telecom_Data)
##    CustomerID      Churn       MonthlyRevenue    MonthlyMinutes  
##  Min.   :3000002   No :36336   Min.   :  -6.17   Min.   :   0.0  
##  1st Qu.:3100632   Yes:14711   1st Qu.:  33.61   1st Qu.: 158.0  
##  Median :3201534               Median :  48.46   Median : 366.0  
##  Mean   :3201957               Mean   :  58.83   Mean   : 525.7  
##  3rd Qu.:3305376               3rd Qu.:  71.06   3rd Qu.: 723.0  
##  Max.   :3399994               Max.   :1223.38   Max.   :7359.0  
##                                NA's   :156       NA's   :156     
##  TotalRecurringCharge DirectorAssistedCalls OverageMinutes   
##  Min.   :-11.00       Min.   :  0.0000      Min.   :   0.00  
##  1st Qu.: 30.00       1st Qu.:  0.0000      1st Qu.:   0.00  
##  Median : 45.00       Median :  0.2500      Median :   3.00  
##  Mean   : 46.83       Mean   :  0.8952      Mean   :  40.03  
##  3rd Qu.: 60.00       3rd Qu.:  0.9900      3rd Qu.:  41.00  
##  Max.   :400.00       Max.   :159.3900      Max.   :4321.00  
##  NA's   :156          NA's   :156           NA's   :156      
##   RoamingCalls      PercChangeMinutes  PercChangeRevenues   DroppedCalls    
##  Min.   :   0.000   Min.   :-3875.00   Min.   :-1107.700   Min.   :  0.000  
##  1st Qu.:   0.000   1st Qu.:  -83.00   1st Qu.:   -7.100   1st Qu.:  0.700  
##  Median :   0.000   Median :   -5.00   Median :   -0.300   Median :  3.000  
##  Mean   :   1.236   Mean   :  -11.55   Mean   :   -1.192   Mean   :  6.011  
##  3rd Qu.:   0.300   3rd Qu.:   66.00   3rd Qu.:    1.600   3rd Qu.:  7.700  
##  Max.   :1112.400   Max.   : 5192.00   Max.   : 2483.500   Max.   :221.700  
##  NA's   :156        NA's   :367        NA's   :367                          
##   BlockedCalls     UnansweredCalls  CustomerCareCalls ThreewayCalls    
##  Min.   :  0.000   Min.   :  0.00   Min.   :  0.000   Min.   : 0.0000  
##  1st Qu.:  0.000   1st Qu.:  5.30   1st Qu.:  0.000   1st Qu.: 0.0000  
##  Median :  1.000   Median : 16.30   Median :  0.000   Median : 0.0000  
##  Mean   :  4.086   Mean   : 28.29   Mean   :  1.869   Mean   : 0.2988  
##  3rd Qu.:  3.700   3rd Qu.: 36.30   3rd Qu.:  1.700   3rd Qu.: 0.3000  
##  Max.   :384.300   Max.   :848.70   Max.   :327.300   Max.   :66.0000  
##                                                                        
##  ReceivedCalls    OutboundCalls     InboundCalls     PeakCallsInOut   
##  Min.   :   0.0   Min.   :  0.00   Min.   :  0.000   Min.   :   0.00  
##  1st Qu.:   8.3   1st Qu.:  3.30   1st Qu.:  0.000   1st Qu.:  23.00  
##  Median :  52.8   Median : 13.70   Median :  2.000   Median :  62.00  
##  Mean   : 114.8   Mean   : 25.38   Mean   :  8.178   Mean   :  90.55  
##  3rd Qu.: 153.5   3rd Qu.: 34.00   3rd Qu.:  9.300   3rd Qu.: 121.30  
##  Max.   :2692.4   Max.   :644.30   Max.   :519.300   Max.   :2090.70  
##                                                                       
##  OffPeakCallsInOut DroppedBlockedCalls CallForwardingCalls CallWaitingCalls 
##  Min.   :   0.00   Min.   :  0.00      Min.   : 0.00000    Min.   :  0.000  
##  1st Qu.:  11.00   1st Qu.:  1.70      1st Qu.: 0.00000    1st Qu.:  0.000  
##  Median :  35.70   Median :  5.30      Median : 0.00000    Median :  0.300  
##  Mean   :  67.65   Mean   : 10.16      Mean   : 0.01228    Mean   :  1.841  
##  3rd Qu.:  88.70   3rd Qu.: 12.30      3rd Qu.: 0.00000    3rd Qu.:  1.300  
##  Max.   :1474.70   Max.   :411.70      Max.   :81.30000    Max.   :212.700  
##                                                                             
##  MonthsInService   UniqueSubs        ActiveSubs     ServiceArea       
##  Min.   : 6.00   Min.   :  1.000   Min.   : 0.000   Length:51047      
##  1st Qu.:11.00   1st Qu.:  1.000   1st Qu.: 1.000   Class :character  
##  Median :16.00   Median :  1.000   Median : 1.000   Mode  :character  
##  Mean   :18.76   Mean   :  1.532   Mean   : 1.354                     
##  3rd Qu.:24.00   3rd Qu.:  2.000   3rd Qu.: 2.000                     
##  Max.   :61.00   Max.   :196.000   Max.   :53.000                     
##                                                                       
##     Handsets      HandsetModels    CurrentEquipmentDays     AgeHH1     
##  Min.   : 1.000   Min.   : 1.000   Min.   :  -5.0       Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 1.000   1st Qu.: 205.0       1st Qu.: 0.00  
##  Median : 1.000   Median : 1.000   Median : 329.0       Median :36.00  
##  Mean   : 1.806   Mean   : 1.559   Mean   : 380.5       Mean   :31.34  
##  3rd Qu.: 2.000   3rd Qu.: 2.000   3rd Qu.: 515.0       3rd Qu.:48.00  
##  Max.   :24.000   Max.   :15.000   Max.   :1812.0       Max.   :99.00  
##  NA's   :1        NA's   :1        NA's   :1            NA's   :909    
##      AgeHH2      ChildrenInHH       HandsetRefurbished HandsetWebCapable 
##  Min.   : 0.00   Length:51047       Length:51047       Length:51047      
##  1st Qu.: 0.00   Class :character   Class :character   Class :character  
##  Median : 0.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :21.14                                                           
##  3rd Qu.:42.00                                                           
##  Max.   :99.00                                                           
##  NA's   :909                                                             
##   TruckOwner          RVOwner          Homeownership      BuysViaMailOrder  
##  Length:51047       Length:51047       Length:51047       Length:51047      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  RespondsToMailOffers OptOutMailings     NonUSTravel        OwnsComputer      
##  Length:51047         Length:51047       Length:51047       Length:51047      
##  Class :character     Class :character   Class :character   Class :character  
##  Mode  :character     Mode  :character   Mode  :character   Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  HasCreditCard      RetentionCalls   RetentionOffersAccepted NewCellphoneUser  
##  Length:51047       Min.   :0.0000   Min.   :0.00000         Length:51047      
##  Class :character   1st Qu.:0.0000   1st Qu.:0.00000         Class :character  
##  Mode  :character   Median :0.0000   Median :0.00000         Mode  :character  
##                     Mean   :0.0372   Mean   :0.01828                           
##                     3rd Qu.:0.0000   3rd Qu.:0.00000                           
##                     Max.   :4.0000   Max.   :3.00000                           
##                                                                                
##  NotNewCellphoneUser ReferralsMadeBySubscriber  IncomeGroup   
##  Length:51047        Min.   : 0.00000          Min.   :0.000  
##  Class :character    1st Qu.: 0.00000          1st Qu.:0.000  
##  Mode  :character    Median : 0.00000          Median :5.000  
##                      Mean   : 0.05207          Mean   :4.325  
##                      3rd Qu.: 0.00000          3rd Qu.:7.000  
##                      Max.   :35.00000          Max.   :9.000  
##                                                               
##  OwnsMotorcycle     AdjustmentsToCreditRating HandsetPrice      
##  Length:51047       Min.   : 0.00000          Length:51047      
##  Class :character   1st Qu.: 0.00000          Class :character  
##  Mode  :character   Median : 0.00000          Mode  :character  
##                     Mean   : 0.05391                            
##                     3rd Qu.: 0.00000                            
##                     Max.   :25.00000                            
##                                                                 
##  MadeCallToRetentionTeam CreditRating        PrizmCode        
##  Length:51047            Length:51047       Length:51047      
##  Class :character        Class :character   Class :character  
##  Mode  :character        Mode  :character   Mode  :character  
##                                                               
##                                                               
##                                                               
##                                                               
##         Occupation    MaritalStatus     
##  Other       :37637   Length:51047      
##  Professional: 8755   Class :character  
##  Crafts      : 1519   Mode  :character  
##  Clerical    :  986                     
##  Self        :  879                     
##  Retired     :  733                     
##  (Other)     :  538
#Subsetting Churned and Retained data
Churned <- subset(Telecom_Data, Churn=="Yes")
Retained <- subset(Telecom_Data, Churn=="No")
# Histogram for relationship between months in service and Churn

library(ggplot2)

ggplot(Churned, aes(x=MonthsInService, fill=Churn)) + geom_histogram(position='identity',alpha=0.6,color='aquamarine4',fill='aquamarine3')+xlab("Service period for churned customers (In Months) ")+ylab("Frequency") + theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#+ggtitle("Service Months Distribution for Churned customers")

# Barplot for Prizm Code effect on Churn
ggplot(Telecom_Data, aes(x=PrizmCode, fill = Churn)) +geom_bar(position = "dodge2")+ggtitle("Churn distribution for Prizm code")

#install.packages("plotly")
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
colors <- c('rgb(211,94,96)', 'rgb(128,133,133)', 'rgb(144,103,167)', 'rgb(171,104,87)', 'rgb(114,147,203)')


fig <- plot_ly(type='pie', labels=Churned$PrizmCode, values=Churned$n,
               textinfo='label+percent',
               insidetextorientation='radial',marker = list(colors = colors,
                      line = list(color = '#FFFFFF', width = 1)))
fig
fig_1 <- plot_ly(type='pie', labels=Retained$PrizmCode, values=Retained$n,
               textinfo='label+percent',
               insidetextorientation='radial',marker = list(colors = colors,
                      line = list(color = '#FFFFFF', width = 1)))
fig_1
#Frequency distribution of Occupation

ggplot(Telecom_Data,aes(x=Occupation)) + geom_bar(fill = "bisque") + ggtitle("Frequency distribution of occupation") 

#Creating a contingency table for Occupation and Churn
Occupation_Churn<-table(Telecom_Data$Occupation,Telecom_Data$Churn)
str(Occupation_Churn)
##  'table' int [1:8, 1:2] 697 1093 106 26705 6288 548 636 263 289 426 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:8] "Clerical" "Crafts" "Homemaker" "Other" ...
##   ..$ : chr [1:2] "No" "Yes"
#Performing Chi Square Test to check if occupation is independent of churn

chisq_test=chisq.test(Occupation_Churn)
chisq_test
## 
##  Pearson's Chi-squared test
## 
## data:  Occupation_Churn
## X-squared = 10.316, df = 7, p-value = 0.1714
p_value=chisq_test$p.value
p_value
## [1] 0.1713543

Occupation is independent of churn.

#Analyzing dropped calls and blocked calls effect on Churn

qqnorm(Churned$DroppedCalls,xlab="DroppedCalls",col="blue",main="A QQ Plot of dropped calls for churned customers")
qqline(Churned$DroppedCalls, col="green")

qqnorm(Retained$DroppedCalls,xlab="DroppedCalls",col="blue",main="A QQ Plot of dropped calls for retained customers")
qqline(Retained$DroppedCalls, col="green")

qqnorm(Churned$BlockedCalls,xlab="BlockedCalls",col="orange",main="A QQ Plot of blocked calls for churned customers")
qqline(Churned$BlockedCalls, col="green")

qqnorm(Retained$BlockedCalls,xlab="BlockedCalls",col="orange",main="A QQ Plot of blocked calls for retained customers")
qqline(Retained$BlockedCalls, col="green")